0: Preparation
Defining the input and output files
Loading libraries
if (!require(knitr)) install.packages("knitr", dependencies = TRUE)
## Loading required package: knitr
library(knitr)
if (!require(ggplot2)) install.packages("ggplot2", dependencies = TRUE)
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.3.2
library(ggplot2)
if (!require(scatterplot3d)) install.packages("scatterplot3d", dependencies = TRUE)
## Loading required package: scatterplot3d
## Warning: package 'scatterplot3d' was built under R version 4.3.1
library(scatterplot3d) # For generating the 3d plots
if (!require(RColorBrewer)) install.packages("RColorBrewer", dependencies = TRUE)
## Loading required package: RColorBrewer
library(RColorBrewer) # For generating a color palette
Latex formatting function
Standard Error Confidence interval function
Confidence level <=> konfidensgrad
# Function to calculate standard error and its confidence interval
standard_error_confidence_interval_fun <- function(observed_data, confidence_level = 0.95) {
# if ( nrow(observed_data) > 1 ) {
if ( length(observed_data) > 1 ) {
# Calculate standard error
n <- length(observed_data)
standard_deviation <- sd(observed_data)
standard_error <- standard_deviation / sqrt(n - 1)
# Calculate confidence interval based on standard error
alpha <- (1 - confidence_level) / 2
margin_of_error <- qnorm(1 - alpha) * standard_error
mean_estimate <- mean(observed_data)
# Calculate the percentiles for the confidence interval
confidence_interval_lower_bound <- mean_estimate - margin_of_error # 2.5th percentile (2σ)
confidence_interval_upper_bound <- mean_estimate + margin_of_error # 97.5th percentile (2σ)
# Return confidence interval
return(c(confidence_interval_lower_bound, confidence_interval_upper_bound))
} else {
return(c(NA,NA))
}
}
# # Function to calculate bootstrap confidence intervals
# standard_error_confidence_interval_fun <- function(observed_data, n_bootstraps = 1000, confidence_level = 0.95) {
#
# # Function to calculate the mean for each bootstrap sample
# compute_bootstrap_mean_fun <- function(observed_data_urn) {
# bootstrap_dataset <- sample(observed_data_urn, replace = TRUE)
# bootstrap_estimate <- mean(bootstrap_dataset)
# return(bootstrap_estimate)
# }
#
# # Perform bootstrap resampling
# bootstrap_point_estimates <- replicate(n_bootstraps, compute_bootstrap_mean_fun(observed_data))
#
# # Calculate the percentiles for the confidence interval
# alpha <- (1 - confidence_level) / 2
# confidence_interval_lower_bound <- quantile(bootstrap_point_estimates, alpha) # 2.5th percentile
# confidence_interval_upper_bound <- quantile(bootstrap_point_estimates, 1 - alpha) # 97.5th percentile
#
# # Return the confidence interval
# return(c(confidence_interval_lower_bound, confidence_interval_upper_bound))
# }
Causative variant
Variant fixation
Fixation probability
Fixation time
# Function to determine the number of rows until fixation is reached
time_to_fixation <- function(data, threshold = 0.9) {
# Find the index of the first row where the allele frequency is 0.9 or higher
fixation_index <- which(data$allele_frequency >= threshold)[1]
# Return the number of rows until fixation is reached
return(fixation_index - 1)
}
Summary - Variant fixation
## [1] "C:/Users/jonat/GitHub/Computational-modelling-of-genomic-inbreeding-and-roh-islands-in-extremely-small-populations/resH17_1_AC/Pipeline_results/Causative_variant_Fixation_summary.txt"
| s=0.1 |
1.0 |
55 |
45 |
64 |
| s=0.2 |
41.7 |
46 |
37 |
58 |
| s=0.3 |
57.1 |
33 |
23 |
40 |
| s=0.4 |
76.9 |
29 |
21 |
37 |
| s=0.5 |
69.0 |
25 |
20 |
31 |
| s=0.6 |
74.1 |
20 |
17 |
23 |
| s=0.7 |
62.5 |
19 |
16 |
24 |
| s=0.8 |
83.3 |
17 |
14 |
21 |
Causative variant windows
Variant position
Window lengths
Expected Heterozygosity
## Point estimate H_e across all 20 simulations for s01_chr9 : 0.1409224 //n[1] "Bootstrap 95% Confidence Interval: [0.0925741889802892, 0.189270649535389]"
## Point estimate H_e across all 20 simulations for s02_chr9 : 0.1468773 //n[1] "Bootstrap 95% Confidence Interval: [0.112635475455286, 0.181119085299917]"
## Point estimate H_e across all 20 simulations for s03_chr9 : 0.1029798 //n[1] "Bootstrap 95% Confidence Interval: [0.0516602472515632, 0.154299390835809]"
## Point estimate H_e across all 20 simulations for s04_chr9 : 0.1187659 //n[1] "Bootstrap 95% Confidence Interval: [0.078310650686274, 0.159221155639888]"
## Point estimate H_e across all 20 simulations for s05_chr9 : 0.1375201 //n[1] "Bootstrap 95% Confidence Interval: [0.0970728581788342, 0.17796743001132]"
## Point estimate H_e across all 20 simulations for s06_chr9 : 0.07875692 //n[1] "Bootstrap 95% Confidence Interval: [0.0561528857574954, 0.101360963717184]"
## Point estimate H_e across all 20 simulations for s07_chr9 : 0.1270691 //n[1] "Bootstrap 95% Confidence Interval: [0.068746299435772, 0.185391996295458]"
## Point estimate H_e across all 20 simulations for s08_chr9 : 0.1242718 //n[1] "Bootstrap 95% Confidence Interval: [0.0746607385549174, 0.173882852083258]"
Summary - Causative variant windows
## [1] "C:/Users/jonat/GitHub/Computational-modelling-of-genomic-inbreeding-and-roh-islands-in-extremely-small-populations/resH17_1_AC/Pipeline_results/Causative_variant_windows_summary.txt"
| s=0.1 |
0.72 |
0.55 |
0.90 |
77.2 |
68.4 |
86.0 |
23.3 |
99.5 |
78.9 |
70.3 |
87.5 |
0.141 |
0.093 |
0.189 |
| s=0.2 |
1.01 |
0.77 |
1.25 |
76.6 |
69.1 |
84.1 |
28.8 |
100.0 |
78.3 |
70.4 |
86.1 |
0.147 |
0.113 |
0.181 |
| s=0.3 |
1.26 |
1.00 |
1.53 |
90.9 |
84.6 |
97.2 |
37.7 |
100.0 |
91.8 |
85.5 |
98.1 |
0.103 |
0.052 |
0.154 |
| s=0.4 |
1.15 |
0.79 |
1.50 |
89.8 |
85.0 |
94.6 |
60.5 |
100.0 |
91.0 |
86.1 |
95.9 |
0.119 |
0.078 |
0.159 |
| s=0.5 |
1.38 |
1.02 |
1.74 |
92.0 |
86.6 |
97.4 |
43.3 |
100.0 |
92.8 |
87.5 |
98.1 |
0.138 |
0.097 |
0.178 |
| s=0.6 |
2.05 |
1.57 |
2.53 |
97.0 |
94.9 |
99.1 |
75.3 |
100.0 |
97.9 |
95.7 |
100.1 |
0.079 |
0.056 |
0.101 |
| s=0.7 |
2.10 |
1.54 |
2.67 |
93.7 |
87.1 |
100.4 |
34.0 |
100.0 |
94.5 |
88.1 |
100.9 |
0.127 |
0.069 |
0.185 |
| s=0.8 |
2.00 |
1.56 |
2.45 |
94.4 |
90.4 |
98.4 |
61.4 |
100.0 |
95.3 |
91.6 |
99.1 |
0.124 |
0.075 |
0.174 |
1: ROH-Frequency
1.1 : Autosome ROH-frequencies
1.1.1 : Empirical - ROH frequency
1.1.2: Neutral Model - ROH frequency
1.1.3: Selection Model
1.1.4 Summary - ROH-hotspot threshold
## ROH-hotspot selection testing results://n
## [1] "C:/Users/jonat/GitHub/Computational-modelling-of-genomic-inbreeding-and-roh-islands-in-extremely-small-populations/resH17_1_AC/Pipeline_results/ROH-hotspot_threshold_comparison.txt"
| Neutral |
51.6 |
44.4 |
58.8 |
| Empirical |
55.4 |
NA |
NA |
| s=0.2 |
77.6 |
70.1 |
85.1 |
| s=0.1 |
80.3 |
75.1 |
85.5 |
| s=0.4 |
92.3 |
88.0 |
96.6 |
| s=0.3 |
92.9 |
88.0 |
97.8 |
| s=0.5 |
94.9 |
90.3 |
99.6 |
| s=0.7 |
96.5 |
93.2 |
99.8 |
| s=0.8 |
98.1 |
96.3 |
99.9 |
| s=0.6 |
98.4 |
96.6 |
100.2 |
1.2 ROH-hotspots - ROH Frequency and Length
2: Inbreeding coefficient
2.1 Empirical data

## Overall Average Avg_F_ROH for labrador_retriever : 0.2333818 //n
2.2 Neutral Model

## Point estimate F_ROH across all 20 simulations: 0.2314507 //n
## [1] "Bootstrap 95% Confidence Interval: [0.219002752647896, 0.243898726421872]"
2.3 Selection Model

## Point estimate F_ROH across all 20 simulations for selection_model_s01_chr9 : 0.2656486 //n[1] "Bootstrap 95% Confidence Interval: [0.252240623253689, 0.279056655816078]"

## Point estimate F_ROH across all 20 simulations for selection_model_s02_chr9 : 0.2777609 //n[1] "Bootstrap 95% Confidence Interval: [0.261878746605762, 0.29364312316168]"

## Point estimate F_ROH across all 20 simulations for selection_model_s03_chr9 : 0.3017125 //n[1] "Bootstrap 95% Confidence Interval: [0.284779672093352, 0.31864533255781]"

## Point estimate F_ROH across all 20 simulations for selection_model_s04_chr9 : 0.3109797 //n[1] "Bootstrap 95% Confidence Interval: [0.298039216034974, 0.323920221174329]"

## Point estimate F_ROH across all 20 simulations for selection_model_s05_chr9 : 0.3077886 //n[1] "Bootstrap 95% Confidence Interval: [0.28852077497684, 0.32705650874409]"

## Point estimate F_ROH across all 20 simulations for selection_model_s06_chr9 : 0.3611243 //n[1] "Bootstrap 95% Confidence Interval: [0.342370560473491, 0.379877960456742]"

## Point estimate F_ROH across all 20 simulations for selection_model_s07_chr9 : 0.3600298 //n[1] "Bootstrap 95% Confidence Interval: [0.343113084116087, 0.376946436814146]"

## Point estimate F_ROH across all 20 simulations for selection_model_s08_chr9 : 0.3408198 //n[1] "Bootstrap 95% Confidence Interval: [0.327834315751562, 0.353805223783321]"
2.4 F_ROH summary
## [1] "C:/Users/jonat/GitHub/Computational-modelling-of-genomic-inbreeding-and-roh-islands-in-extremely-small-populations/resH17_1_AC/Pipeline_results/F_ROH_comparison.txt"
| Neutral |
0.231 |
0.219 |
0.244 |
| Empirical |
0.233 |
NA |
NA |
| s=0.1 |
0.266 |
0.252 |
0.279 |
| s=0.2 |
0.278 |
0.262 |
0.294 |
| s=0.3 |
0.302 |
0.285 |
0.319 |
| s=0.5 |
0.308 |
0.289 |
0.327 |
| s=0.4 |
0.311 |
0.298 |
0.324 |
| s=0.8 |
0.341 |
0.328 |
0.354 |
| s=0.7 |
0.360 |
0.343 |
0.377 |
| s=0.6 |
0.361 |
0.342 |
0.380 |
3: Expected Heterozygosity
3.1 Empirical data
3.2 Neutral Model
3.3 Selection Model
3.4 Genomewide Average H_e
| s=0.7 |
0.310 |
0.305 |
0.315 |
| s=0.6 |
0.311 |
0.305 |
0.316 |
| s=0.4 |
0.313 |
0.308 |
0.318 |
| s=0.8 |
0.316 |
0.310 |
0.321 |
| Empirical |
0.316 |
NA |
NA |
| s=0.1 |
0.317 |
0.312 |
0.322 |
| s=0.2 |
0.319 |
0.314 |
0.325 |
| s=0.5 |
0.320 |
0.314 |
0.327 |
| s=0.3 |
0.322 |
0.316 |
0.327 |
| Neutral |
0.330 |
0.325 |
0.335 |
3.5 Genomewide 5th percentile comparison - Expected Heterozygosity
Summary
## [1] "C:/Users/jonat/GitHub/Computational-modelling-of-genomic-inbreeding-and-roh-islands-in-extremely-small-populations/resH17_1_AC/Pipeline_results/Expected_Heterozygosity_Summary.txt"
| s=0.6 |
0.119 |
0.108 |
0.130 |
| s=0.7 |
0.120 |
0.109 |
0.131 |
| s=0.8 |
0.131 |
0.120 |
0.142 |
| s=0.4 |
0.132 |
0.119 |
0.144 |
| s=0.5 |
0.136 |
0.122 |
0.151 |
| s=0.1 |
0.139 |
0.130 |
0.148 |
| s=0.2 |
0.139 |
0.126 |
0.152 |
| s=0.3 |
0.142 |
0.128 |
0.156 |
| Empirical |
0.151 |
NA |
NA |
| Neutral |
0.179 |
0.170 |
0.187 |
4: Summary
4.0 General comparison
4.0.1 ROH-hotspot threshold comparison
##
## ROH-hotspot threshold comparison between the different datasets
| Neutral |
51.6 |
44.4 |
58.8 |
| Empirical |
55.4 |
NA |
NA |
| s=0.2 |
77.6 |
70.1 |
85.1 |
| s=0.1 |
80.3 |
75.1 |
85.5 |
| s=0.4 |
92.3 |
88.0 |
96.6 |
| s=0.3 |
92.9 |
88.0 |
97.8 |
| s=0.5 |
94.9 |
90.3 |
99.6 |
| s=0.7 |
96.5 |
93.2 |
99.8 |
| s=0.8 |
98.1 |
96.3 |
99.9 |
| s=0.6 |
98.4 |
96.6 |
100.2 |
4.0.2 F_ROH comparison
## Models where empirical F_ROH is within CI:
## [1] "Neutral"
##
## Models where empirical F_ROH is outside CI:
## [1] "s=0.1" "s=0.2" "s=0.3" "s=0.4" "s=0.5" "s=0.6" "s=0.7" "s=0.8"
## png
## 2
| Neutral |
0.231 |
0.219 |
0.244 |
| Empirical |
0.233 |
NA |
NA |
| s=0.1 |
0.266 |
0.252 |
0.279 |
| s=0.2 |
0.278 |
0.262 |
0.294 |
| s=0.3 |
0.302 |
0.285 |
0.319 |
| s=0.5 |
0.308 |
0.289 |
0.327 |
| s=0.4 |
0.311 |
0.298 |
0.324 |
| s=0.8 |
0.341 |
0.328 |
0.354 |
| s=0.7 |
0.360 |
0.343 |
0.377 |
| s=0.6 |
0.361 |
0.342 |
0.380 |
4.1: Hotspot comparison
4.1.1: Selection test (Sweep test)
## [1] "Selection test results"
## [1] "ROH-hotspot windows with an mean H_e Value lower or equal to the lower confidence interval of the fifth percentile of the neutral model are classified as being under selection"
## [1] "5th percentile of the neutral model is: 0.16993"
| hotspot_chr28_window_1 |
Yes |
0.118 |
| hotspot_chr1_window_1 |
Yes |
0.144 |
| hotspot_chr24_window_1 |
No |
0.186 |
| hotspot_chr13_window_1 |
No |
0.190 |
| hotspot_chr11_window_2 |
No |
0.205 |
| hotspot_chr15_window_1 |
No |
0.223 |
| hotspot_chr6_window_1 |
No |
0.232 |
| hotspot_chr11_window_1 |
No |
0.233 |
| hotspot_chr8_window_1 |
No |
0.307 |
## [1] "C:/Users/jonat/GitHub/Computational-modelling-of-genomic-inbreeding-and-roh-islands-in-extremely-small-populations/resH17_1_AC/Pipeline_results/ROH_hotspots_Selection_testing_neutral_model_H_E_threshold_0.17.csv.txt"
## [1] "ROH-hotspots under selection:"
| hotspot_chr28_window_1 |
Yes |
0.118 |
| hotspot_chr1_window_1 |
Yes |
0.144 |
4.1.2: Selection Strength Test (Sweep test)
Hotspot - Causative Window - Comparison
| s=0.8 |
2.00 |
1.56 |
2.45 |
94.4 |
90.4 |
98.4 |
0.124 |
0.075 |
0.174 |
| s=0.7 |
2.10 |
1.54 |
2.67 |
93.7 |
87.1 |
100.4 |
0.127 |
0.069 |
0.185 |
| s=0.6 |
2.05 |
1.57 |
2.53 |
97.0 |
94.9 |
99.1 |
0.079 |
0.056 |
0.101 |
| s=0.5 |
1.38 |
1.02 |
1.74 |
92.0 |
86.6 |
97.4 |
0.138 |
0.097 |
0.178 |
| s=0.4 |
1.15 |
0.79 |
1.50 |
89.8 |
85.0 |
94.6 |
0.119 |
0.078 |
0.159 |
| s=0.3 |
1.26 |
1.00 |
1.53 |
90.9 |
84.6 |
97.2 |
0.103 |
0.052 |
0.154 |
| s=0.2 |
1.01 |
0.77 |
1.25 |
76.6 |
69.1 |
84.1 |
0.147 |
0.113 |
0.181 |
| s=0.1 |
0.72 |
0.55 |
0.90 |
77.2 |
68.4 |
86.0 |
0.141 |
0.093 |
0.189 |
| Hotspot_chr28_window_1 |
1.40 |
NA |
NA |
75.0 |
NA |
NA |
0.118 |
NA |
NA |
| Hotspot_chr1_window_1 |
0.70 |
NA |
NA |
56.2 |
NA |
NA |
0.144 |
NA |
NA |
Hotspot, Causativa window - 3D plot-Comparison
setwd(output_dir)
# plot_title <- paste("Length And Average ROH % Comparison - ROH Hotspots & Causative Windows")
plot_title <- paste("ROH Hotspot(s) & Causative Windows comparison")
# Modify the labels by removing "Hotspot_" prefix and "_window" suffix from hotspot models
Hotspots_and_Causative_windows_comparison_sorted$Label <- ifelse(
grepl("Hotspot", Hotspots_and_Causative_windows_comparison_sorted$Model),
gsub("Hotspot_|_window", "", Hotspots_and_Causative_windows_comparison_sorted$Model),
Hotspots_and_Causative_windows_comparison_sorted$Model
)
# Removing the "s=" part from the selection coefficients for the plot display
Hotspots_and_Causative_windows_comparison_sorted$Label <- gsub("^s=", "", Hotspots_and_Causative_windows_comparison_sorted$Label)
# Create a new column for identifying the type
Hotspots_and_Causative_windows_comparison_sorted$Type <- ifelse(
grepl("Hotspot", Hotspots_and_Causative_windows_comparison_sorted$Model),
"Hotspot",
"Selection Coefficient"
)
# Generate a color palette for the hotspots
hotspot_models <- unique(Hotspots_and_Causative_windows_comparison_sorted$Model[Hotspots_and_Causative_windows_comparison_sorted$Type == "Hotspot"])
num_hotspots <- length(hotspot_models)
# Choose the color palette based on the number of hotspots
if (num_hotspots == 2) {
color_palette_name <- "Set2"
} else {
color_palette_name <- "Set3"
}
# Get the colors for the hotspots
hotspot_colors <- setNames(brewer.pal(n = num_hotspots, name = color_palette_name), hotspot_models)
## Warning in brewer.pal(n = num_hotspots, name = color_palette_name): minimal value for n is 3, returning requested palette with 3 different levels
# Assign colors to each point
Hotspots_and_Causative_windows_comparison_sorted$Color <- ifelse(
Hotspots_and_Causative_windows_comparison_sorted$Type == "Hotspot",
hotspot_colors[Hotspots_and_Causative_windows_comparison_sorted$Model],
"blue"
)
x_value <- Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq
x_axis_label <- "Avg ROH-frequency (%)"
y_value <- Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb
y_axis_label <- "Length (Mb)"
z_value <- Hotspots_and_Causative_windows_comparison_sorted$H_e
z_axis_label <- "Avg H_e"
# x_value <- Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb
# x_axis_label <- "Length (Mb)"
# y_value <- Hotspots_and_Causative_windows_comparison_sorted$H_e
# y_axis_label <- "Avg H_e"
# z_value <- Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq
# z_axis_label <- "Avg ROH-frequency (%)"
x_value <- Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq
x_axis_label <- "Avg ROH-frequency (%)"
y_value <- Hotspots_and_Causative_windows_comparison_sorted$H_e
y_axis_label <- "Avg H_e"
z_value <- Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb
z_axis_label <- "Length (Mb)"
# x_value <- Hotspots_and_Causative_windows_comparison_sorted$H_e
# x_axis_label <- "Avg H_e"
# y_value <- Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq
# y_axis_label <- "Avg ROH-frequency (%)"
# z_value <- Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb
# z_axis_label <- "Length (Mb)"
# # Create and save the 3D scatter plot as a PNG file
# png(filename = "3dplot_Hotspot_Causative_Window_Comparison.png", width = 1920, height = 1080, res = 300)
# png(filename = "3dplot_Hotspot_Causative_Window_Comparison.png",width = 800, height = 600, res = 300)
# Create the 3D scatter plot
s3d <- scatterplot3d(
x_value ,
y_value,
z_value,
color = Hotspots_and_Causative_windows_comparison_sorted$Color,
pch = 19, # Solid circle
xlab = x_axis_label,
ylab = y_axis_label,
zlab = z_axis_label,
main = plot_title
)
# Add shadows on the x-y plane (z = 0)
s3d$points3d(
x_value,
y_value,
rep(0, nrow(Hotspots_and_Causative_windows_comparison_sorted)), # Set z to 0 for shadow
col = adjustcolor(Hotspots_and_Causative_windows_comparison_sorted$Color, alpha.f = 0.3), # Semi-transparent shadows
pch = 19
)
# Convert coordinates for adding labels
s3d.coords <- s3d$xyz.convert(
x_value,
y_value,
z_value
)
# Add labels to the original points
text(s3d.coords$x, s3d.coords$y,
labels = Hotspots_and_Causative_windows_comparison_sorted$Label,
pos = 3, cex = 0.5) # pos=3 means above

# # Close the graphics device
# dev.off()
# Sort the data frame based on average fixation time, in descending order
kable(Hotspots_and_Causative_windows_comparison[order(-Hotspots_and_Causative_windows_comparison$ROH_Freq), ])
| 8 |
s=0.6 |
2.05 |
1.57 |
2.53 |
97.0 |
94.9 |
99.1 |
0.079 |
0.056 |
0.101 |
| 10 |
s=0.8 |
2.00 |
1.56 |
2.45 |
94.4 |
90.4 |
98.4 |
0.124 |
0.075 |
0.174 |
| 9 |
s=0.7 |
2.10 |
1.54 |
2.67 |
93.7 |
87.1 |
100.4 |
0.127 |
0.069 |
0.185 |
| 7 |
s=0.5 |
1.38 |
1.02 |
1.74 |
92.0 |
86.6 |
97.4 |
0.138 |
0.097 |
0.178 |
| 5 |
s=0.3 |
1.26 |
1.00 |
1.53 |
90.9 |
84.6 |
97.2 |
0.103 |
0.052 |
0.154 |
| 6 |
s=0.4 |
1.15 |
0.79 |
1.50 |
89.8 |
85.0 |
94.6 |
0.119 |
0.078 |
0.159 |
| 3 |
s=0.1 |
0.72 |
0.55 |
0.90 |
77.2 |
68.4 |
86.0 |
0.141 |
0.093 |
0.189 |
| 4 |
s=0.2 |
1.01 |
0.77 |
1.25 |
76.6 |
69.1 |
84.1 |
0.147 |
0.113 |
0.181 |
| 2 |
Hotspot_chr28_window_1 |
1.40 |
NA |
NA |
75.0 |
NA |
NA |
0.118 |
NA |
NA |
| 1 |
Hotspot_chr1_window_1 |
0.70 |
NA |
NA |
56.2 |
NA |
NA |
0.144 |
NA |
NA |
kable(Hotspots_and_Causative_windows_comparison[order(-Hotspots_and_Causative_windows_comparison$H_e), ])
| 4 |
s=0.2 |
1.01 |
0.77 |
1.25 |
76.6 |
69.1 |
84.1 |
0.147 |
0.113 |
0.181 |
| 1 |
Hotspot_chr1_window_1 |
0.70 |
NA |
NA |
56.2 |
NA |
NA |
0.144 |
NA |
NA |
| 3 |
s=0.1 |
0.72 |
0.55 |
0.90 |
77.2 |
68.4 |
86.0 |
0.141 |
0.093 |
0.189 |
| 7 |
s=0.5 |
1.38 |
1.02 |
1.74 |
92.0 |
86.6 |
97.4 |
0.138 |
0.097 |
0.178 |
| 9 |
s=0.7 |
2.10 |
1.54 |
2.67 |
93.7 |
87.1 |
100.4 |
0.127 |
0.069 |
0.185 |
| 10 |
s=0.8 |
2.00 |
1.56 |
2.45 |
94.4 |
90.4 |
98.4 |
0.124 |
0.075 |
0.174 |
| 6 |
s=0.4 |
1.15 |
0.79 |
1.50 |
89.8 |
85.0 |
94.6 |
0.119 |
0.078 |
0.159 |
| 2 |
Hotspot_chr28_window_1 |
1.40 |
NA |
NA |
75.0 |
NA |
NA |
0.118 |
NA |
NA |
| 5 |
s=0.3 |
1.26 |
1.00 |
1.53 |
90.9 |
84.6 |
97.2 |
0.103 |
0.052 |
0.154 |
| 8 |
s=0.6 |
2.05 |
1.57 |
2.53 |
97.0 |
94.9 |
99.1 |
0.079 |
0.056 |
0.101 |
Visualizing and scaling
# Min-max-scaling normalization function, that normalizes a vector of values
min_max_normalization_fun <- function(x) {
return((x - min(x)) / (max(x) - min(x)))
}
### Min max scaling ###
# Normalize Lengths_Mb
Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb_Normalized <- min_max_normalization_fun(Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb)
# Normalize ROH Frequency
Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq_Normalized <- min_max_normalization_fun(Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq)
# Normalize H_e
Hotspots_and_Causative_windows_comparison_sorted$H_e_Normalized <- min_max_normalization_fun(Hotspots_and_Causative_windows_comparison_sorted$H_e)
# Modify the labels by removing "Hotspot_" prefix and "_window" suffix from hotspot models
Hotspots_and_Causative_windows_comparison_sorted$Label <- ifelse(
grepl("Hotspot", Hotspots_and_Causative_windows_comparison_sorted$Model),
gsub("Hotspot_|_window", "", Hotspots_and_Causative_windows_comparison_sorted$Model),
Hotspots_and_Causative_windows_comparison_sorted$Model
)
# Create a new column for identifying the type
Hotspots_and_Causative_windows_comparison_sorted$Type <- ifelse(
grepl("Hotspot", Hotspots_and_Causative_windows_comparison_sorted$Model),
"Hotspot",
"Selection Coefficient"
)
plot_title <- paste("Normalized ROH Hotspot(s) & Causative Windows comparison")
# Create the 3D scatter plot
s3d <- scatterplot3d(
Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb_Normalized,
Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq_Normalized,
Hotspots_and_Causative_windows_comparison_sorted$H_e_Normalized,
color = ifelse(Hotspots_and_Causative_windows_comparison_sorted$Type == "Hotspot", "red", "blue"),
pch = 19, # Solid circle
xlab = "Normalized Lengths",
ylab = "Normalized Mean ROH Frequency",
zlab = "Normalized H_e",
main = plot_title
)
# Add labels to the points
s3d.coords <- s3d$xyz.convert(
Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb_Normalized,
Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq_Normalized,
Hotspots_and_Causative_windows_comparison_sorted$H_e_Normalized
)
text(s3d.coords$x, s3d.coords$y,
labels = Hotspots_and_Causative_windows_comparison_sorted$Label,
pos = 3, cex = 0.5) # pos=3 means above
# Visualization for Selection Strength estimation
5.1 Standard Error CI for H_e
## Models where empirical H_e is within CI for hotspot_chr28_window_1 :
## [1] "s=0.1" "s=0.2" "s=0.3" "s=0.4" "s=0.5" "s=0.7" "s=0.8"
##
## Models where empirical H_e is outside CI for hotspot_chr28_window_1 :
## [1] "s=0.6"
## Models where empirical H_e is within CI for hotspot_chr1_window_1 :
## [1] "s=0.1" "s=0.2" "s=0.3" "s=0.4" "s=0.5" "s=0.7" "s=0.8"
##
## Models where empirical H_e is outside CI for hotspot_chr1_window_1 :
## [1] "s=0.6"
5.2 Standard Error CI for Length
## TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## Models where empirical H_e is within CI for Hotspot_chr1_window_1 :
## [1] "s=0.1"
##
## Models where empirical H_e is outside CI for Hotspot_chr1_window_1 :
## [1] "s=0.2" "s=0.3" "s=0.4" "s=0.5" "s=0.6" "s=0.7" "s=0.8"
## FALSE FALSE TRUE TRUE TRUE FALSE FALSE FALSE
## Models where empirical H_e is within CI for Hotspot_chr28_window_1 :
## [1] "s=0.3" "s=0.4" "s=0.5"
##
## Models where empirical H_e is outside CI for Hotspot_chr28_window_1 :
## [1] "s=0.1" "s=0.2" "s=0.6" "s=0.7" "s=0.8"
5.3 Standard Error CI for ROH Freq
## Models where empirical H_e is within CI for Hotspot_chr1_window_1 :
## character(0)
##
## Models where empirical H_e is outside CI for Hotspot_chr1_window_1 :
## [1] "s=0.1" "s=0.2" "s=0.3" "s=0.4" "s=0.5" "s=0.6" "s=0.7" "s=0.8"
## Models where empirical H_e is within CI for Hotspot_chr28_window_1 :
## [1] "s=0.1" "s=0.2"
##
## Models where empirical H_e is outside CI for Hotspot_chr28_window_1 :
## [1] "s=0.3" "s=0.4" "s=0.5" "s=0.6" "s=0.7" "s=0.8"